?

1 Preparations and Data

1.0 Required Packages

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(Seurat)
library(ggplot2)

packageVersion("dplyr")
[1] '0.8.99.9003'
packageVersion("Seurat")
[1] '3.1.5'

1.1 Available Data Sets

1.2 Load Data

CITE_GE_HTO_010.data <- Read10X(data.dir = "/home/david.mentrup/ProjectCITE/data/Run01/10X_20_OK_05_R59/cellranger/10X_20_010_GE_HTO/outs/filtered_feature_bc_matrix/")
10X data contains more than one type and is being returned as a list containing matrices of each type.
CITE_GE_HTO_011.data <- Read10X(data.dir = "/home/david.mentrup/ProjectCITE/data/Run01/10X_20_OK_05_R59/cellranger/10X_20_011_GE_HTO/outs/filtered_feature_bc_matrix/")
10X data contains more than one type and is being returned as a list containing matrices of each type.
CITE_GE_Epi_010.data <- Read10X(data.dir = "/home/david.mentrup/ProjectCITE/data/Run01/10X_20_OK_05_R59/cellranger/10X_20_010_GE_Epi/outs/filtered_feature_bc_matrix/", gene.column=1)
10X data contains more than one type and is being returned as a list containing matrices of each type.
# Names for Hashs/Epitopes corrected:
rownames(x = CITE_GE_HTO_010.data[["Antibody Capture"]]) <- c("Pat1-R1-H1","Pat1-Ven-R1-H2", "HL60-R1-H3")

rownames(x = CITE_GE_HTO_011.data[["Antibody Capture"]]) <- c("HL60-R2-H3")

rownames(x = CITE_GE_Epi_010.data[["Antibody Capture"]]) <- gsub(pattern ="_[control_]*TotalSeqA", replacement = "", x = rownames(x = CITE_GE_Epi_010.data[["Antibody Capture"]]))

Create Seurat Object

CITE_GE_HTO_010 <- CreateSeuratObject(counts = CITE_GE_HTO_010.data[["Gene Expression"]], min.cells = 3, min.features = 0)
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
CITE_GE_HTO_010[["HTO"]] <- CreateAssayObject(CITE_GE_HTO_010.data[["Antibody Capture"]])
CITE_GE_HTO_010[["Epi"]] <- CreateAssayObject(CITE_GE_Epi_010.data[["Antibody Capture"]])
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
CITE_GE_HTO_010
An object of class Seurat 
19524 features across 7062 samples within 3 assays 
Active assay: RNA (19510 features, 0 variable features)
 2 other assays present: HTO, Epi
median(CITE_GE_HTO_010$nCount_RNA)
[1] 13242
median(CITE_GE_HTO_010$nFeature_RNA)
[1] 3389.5
median(CITE_GE_HTO_010$nCount_HTO)
[1] 778
median(CITE_GE_HTO_010$nCount_Epi)
[1] 211

This sample is the control experiment without epitope labelling

CITE_GE_HTO_011 <- CreateSeuratObject(counts = CITE_GE_HTO_011.data[["Gene Expression"]], min.cells = 3, min.features = 0)
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
CITE_GE_HTO_011[["HTO"]] <- CreateAssayObject(CITE_GE_HTO_011.data[["Antibody Capture"]])
CITE_GE_HTO_011
An object of class Seurat 
18003 features across 3769 samples within 2 assays 
Active assay: RNA (18002 features, 0 variable features)
 1 other assay present: HTO
median(CITE_GE_HTO_011$nCount_RNA)
[1] 14149
median(CITE_GE_HTO_011$nFeature_RNA)
[1] 3628
median(CITE_GE_HTO_011$nCount_HTO)
[1] 40

Control and analyze RNA data

Standart quality control and filtering

#Mitochondrial genes selected 
CITE_GE_HTO_010[["percent.mt"]] <- PercentageFeatureSet(CITE_GE_HTO_010, pattern = "^MT-")
CITE_GE_HTO_011[["percent.mt"]] <- PercentageFeatureSet(CITE_GE_HTO_011, pattern = "^MT-")
# Visualize as a violin plot
# Visualize QC metrics as a violin plot
VlnPlot(CITE_GE_HTO_010, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

VlnPlot(CITE_GE_HTO_011, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

The aggregated correlation of amount of RNA and mitochondrial RNA as well as number of Feature is plotted as a further QC

plot1 <- FeatureScatter(CITE_GE_HTO_010, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plot2 <- FeatureScatter(CITE_GE_HTO_010, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
Warning: CombinePlots is being deprecated. Plots should now be combined using
the patchwork system.

plot1 <- FeatureScatter(CITE_GE_HTO_011, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plot2 <- FeatureScatter(CITE_GE_HTO_011, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
Warning: CombinePlots is being deprecated. Plots should now be combined using
the patchwork system.

We filter for mitochodrial genes (>20%) as well as all cells below 600 features out as well

CITE_GE_HTO_010 <- subset(CITE_GE_HTO_010, subset = nFeature_RNA > 600 & percent.mt <20)
CITE_GE_HTO_011 <- subset(CITE_GE_HTO_011, subset = nFeature_RNA > 600 & percent.mt <20)

Gene expression processing Sample 010 (S01_CITE)

CITE_GE_HTO_010<- NormalizeData(CITE_GE_HTO_010)
CITE_GE_HTO_010<- FindVariableFeatures(CITE_GE_HTO_010)
CITE_GE_HTO_010<- ScaleData(CITE_GE_HTO_010)
Centering and scaling data matrix
CITE_GE_HTO_010<- RunPCA(CITE_GE_HTO_010)
PC_ 1 
Positive:  IL8, SAT1, THBS1, BTG1, IER3, SLC7A11, KYNU, SOCS3, NAMPT, C15orf48 
       PLAUR, CXCL3, SOD2, PHLDA1, NFKBIZ, NEU4, IL1B, CREM, CXCL2, PDE4DIP 
       MXD1, MS4A7, DUSP1, BCL2A1, INHBA, TFPI, POU2F2, LGALS3, ABCA1, PTPRE 
Negative:  PTMA, TUBA1B, NPM1, TYMS, SNRPD1, DUT, STMN1, HMGB1, UQCRH, RANBP1 
       KIAA0101, PA2G4, MIF, LDHB, NUCB2, FBL, SNRPF, GCSH, SLC29A1, NME1 
       PPA1, PPP1R27, PPIA, HSPD1, TIMM13, RPL27A, HSPE1, PAICS, TUBB, RNASEH2B 
PC_ 2 
Positive:  RPS2, RPL14, RPL21, RPL13A, RPS14, RPS5, RPSA, EGFL7, NCOA7, SOX4 
       SPINK2, RPL4, IL1RL1, GUCY1A3, GNB2L1, SLC40A1, GBP4, SOCS2, TFPI, RPS11 
       GCSAML, AQP3, FSCN1, IGFBP2, HLA-DPB1, CPA3, HLA-DPA1, NPM1, CYTL1, PEBP1 
Negative:  EPB41L3, FCER1G, CYBB, CD14, CD93, ANXA5, S100A9, PLA2G7, MMP14, RAB31 
       PILRA, NCF2, HNMT, MPEG1, S100A8, LYZ, MS4A7, VCAN, NCF1, S100A10 
       SH3BP5, CXCL16, SERPINB2, SERPINA1, RBM47, CTB-61M7.2, PID1, CD300E, S100A12, CTSL 
PC_ 3 
Positive:  CAMK1, CD4, RNASET2, APP, PLAC8, RP11-1112C15.1, CST7, TGM5, LSP1, NR2F2 
       AL589743.1, PPP1R27, FAM107B, HOXC10, IGF2BP3, TRIP6, IGF2BP1, C1orf228, MNDA, NRN1 
       CD48, EVL, LRP3, MPO, LIN7A, MEIS2, RPL13A, S100B, CEBPE, OSBPL1A 
Negative:  CLSPN, HELLS, MCM10, CCNE2, CHEK1, DTL, PCNA, CPA3, XRCC2, UNG 
       IGFBP2, FEN1, BRCA1, MCM6, FANCI, HSPA1A, MCM2, ATAD5, CENPU, KIAA0101 
       MCM5, CDC6, TIMP1, HSPA8, RAD51, RRM1, TK1, NCOA7, CDT1, TYMS 
PC_ 4 
Positive:  CST3, S100A4, ITGB2, ALOX5AP, CAPG, CORO1A, ITM2B, AMICA1, ICAM3, HLA-DMA 
       HLA-DPA1, SCPEP1, TNNI2, HLA-DPB1, TIMP1, ARHGDIB, NCF1, CD52, S100B, SAMHD1 
       AIF1, COTL1, C1orf162, ADORA3, TOMM7, PAK1, PLXDC2, SIGLEC7, CD101, HLA-DQB1 
Negative:  INHBA, NEU4, PHLDA1, TSC22D1, IL11, CXCL3, CXCL2, NAMPT, BTG1, ATP2B1 
       DDIT3, SAT1, DDIT4, JUN, CTNNAL1, CXCL1, CHST7, LINC00936, NR4A3, CREM 
       TRAF1, IL24, ATF3, DUSP1, IL8, IRAK2, TNFAIP6, SOCS3, MAFF, MAP4K4 
PC_ 5 
Positive:  YBX3, IL8, CYCS, ATP5B, HSP90AB1, MIR155HG, COX5A, PSMB7, INHBA, SRM 
       CXCL3, CXCL2, UQCRFS1, PSMB1, GTSF1, UBE2M, YBX1, NOP16, NME1, RPL7L1 
       EBNA1BP2, TIMM13, SAT1, NAMPT, NEU4, HSPD1, IL1B, TFPI, CXCL1, PSMB2 
Negative:  GIMAP7, CD3D, CD2, SPOCK2, CXCR4, GIMAP4, CD3G, CD3E, CD7, SELL 
       PRDX2, CRIP1, SESN3, GZMM, IL7R, ARL4C, EVL, CCR7, CD6, CD48 
       CLIC3, TRAT1, KLF2, LBH, ABLIM1, LTB, CD27, TSHZ2, CDK1, GNLY 
ElbowPlot(CITE_GE_HTO_010, ndims = 50)

CITE_GE_HTO_010 <- RunUMAP(CITE_GE_HTO_010, dims = 1:25)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
16:50:14 UMAP embedding parameters a = 0.9922 b = 1.112
16:50:14 Read 6505 rows and found 25 numeric columns
16:50:14 Using Annoy for neighbor search, n_neighbors = 30
16:50:14 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:50:16 Writing NN index file to temp file /tmp/Rtmp9rQSNO/file1acbf121d7d45
16:50:16 Searching Annoy index using 1 thread, search_k = 3000
16:50:17 Annoy recall = 100%
16:50:18 Commencing smooth kNN distance calibration using 1 thread
16:50:18 Initializing from normalized Laplacian + noise
16:50:19 Commencing optimization for 500 epochs, with 267946 positive edges
16:50:43 Optimization finished
CITE_GE_HTO_010 <- FindNeighbors(CITE_GE_HTO_010, dims = 1:25)
Computing nearest neighbor graph
Computing SNN
CITE_GE_HTO_010 <- FindClusters(CITE_GE_HTO_010, resolution = 0.6, algorithm = 3)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6505
Number of edges: 227543

Running smart local moving algorithm...
Maximum modularity in 10 random starts: 0.8810
Number of communities: 12
Elapsed time: 3 seconds
CITE_GE_HTO_010@meta.data$RNA_cluster <-CITE_GE_HTO_010@active.ident
DimPlot(CITE_GE_HTO_010, label = TRUE, label.size = 10) + NoLegend()

For Sample 011 (S01_noCITE)

CITE_GE_HTO_011<- NormalizeData(CITE_GE_HTO_011)
CITE_GE_HTO_011<- FindVariableFeatures(CITE_GE_HTO_011)
CITE_GE_HTO_011<- ScaleData(CITE_GE_HTO_011)
Centering and scaling data matrix
CITE_GE_HTO_011<- RunPCA(CITE_GE_HTO_011)
PC_ 1 
Positive:  IL8, THBS1, SAT1, BTG1, IER3, SLC7A11, NAMPT, KYNU, PLAUR, C15orf48 
       DUSP1, NFKBIZ, FOSL2, CXCL3, MXD1, SOD2, JUN, MS4A7, PHLDA1, SLC43A2 
       SOCS3, IL1B, CXCL2, MMP14, SH3BP5, MAFB, CD14, SERPINB2, CTB-61M7.2, ABCA1 
Negative:  PTMA, NPM1, SNRPD1, DUT, TYMS, TUBA1B, RANBP1, LDHB, STMN1, NME1 
       KIAA0101, HSPE1, PPA1, UQCRH, PA2G4, FBL, HSPD1, NHP2, GCSH, PAICS 
       RPS2, RPSA, HMGB1, SRM, HSP90AB1, ATP5G1, SNRPF, RPS14, RPS5, RAN 
PC_ 2 
Positive:  RPL18A, RPL5, RPL13A, RPL36, RPS2, RPL21, RPL23A, RPS15, RPS8, RPL14 
       RPS5, RPS14, RPSA, RPLP2, SOX4, RPL4, TFPI, RPS28, EGFL7, GNB2L1 
       GBP4, NCOA7, SLC45A3, GUCY1A3, RPS25, IL1RL1, RPS11, SPINK2, SLC40A1, FSCN1 
Negative:  EPB41L3, FCER1G, CD93, S100A9, ANXA5, CYBB, CD14, PLA2G7, RAB31, LYZ 
       NCF2, PILRA, MMP14, S100A10, CTB-61M7.2, MPEG1, S100A8, NCF1, HNMT, MT2A 
       CD300E, VCAN, SERPINA1, MS4A7, SERPINB2, SH3BP5, CXCL16, PID1, RBM47, CYP27A1 
PC_ 3 
Positive:  CLSPN, GINS2, HELLS, E2F1, MCM10, MCM4, DTL, CCNE2, CENPK, CHEK1 
       PCNA, CPA3, MCM6, MCM2, XRCC2, CSF2RB, IGFBP2, UNG, HSPA8, SLC45A3 
       ATAD5, BRCA1, TIMP1, CHAF1A, FEN1, HSPA1A, EXO1, MCM5, CDT1, FANCI 
Negative:  PPP1R27, NR2F2, APP, CAMK1, CST7, MPO, IGF2BP1, RP11-1112C15.1, CD4, LRP3 
       AL589743.1, PLAC8, RNASET2, TRIP6, CEBPE, HOXC10, LIN7A, CTSG, PRTN3, C1orf228 
       TGM5, IGF2BP3, MNDA, AZU1, MEIS2, IGFBP7, CORO2A, NRN1, KCNN2, RNF165 
PC_ 4 
Positive:  IL11, NEU4, TSC22D1, PHLDA1, INHBA, DDIT3, CTNNAL1, ATP2B1, CXCL2, CXCL3 
       NAMPT, BTG1, ATF3, SAT1, FOSL2, NRIP3, CREM, LINC00936, IL24, DDIT4 
       USP53, JUN, TUBB4B, RBKS, CDK1, CHST7, CXCL1, NR4A3, ABCA1, TOP2A 
Negative:  GSN, S100A4, CST3, CORO1A, LST1, AMICA1, TSPO, HLA-DMA, ARHGDIB, ITGB2 
       HLA-DPA1, ITM2B, HLA-DPB1, CAPG, ALOX5AP, HLA-DQB1, CFP, ICAM3, FGL2, HLA-DMB 
       AIF1, CD52, SAMHD1, CD9, PLXDC2, SORL1, CD180, NCF1, LAT2, HLA-DQA1 
PC_ 5 
Positive:  YBX3, CXCL1, IL8, CYCS, CXCL2, CXCL3, IL6, HSP90AB1, RPF2, MIR155HG 
       IL1B, SRM, ITGB8, TXN, MARCKS, CCL20, RPL7L1, IL1A, NME1, PSMB7 
       EBNA1BP2, NAMPT, CXCL5, TIMM13, CCL3, BCAT1, HSPE1, IL24, NOP16, TNFAIP6 
Negative:  GIMAP7, SYNE2, GIMAP4, CD3D, CD2, SPOCK2, CXCR4, CLIC3, GZMM, CRIP1 
       EVL, LCK, CD3E, CD3G, CDK1, GNLY, SAMD3, LSP1, PRDX2, LBH 
       HMGB2, NCAPG, NDC80, RRM2, ITM2B, UBE2C, SPC25, SLA, TOP2A, KIFC1 
ElbowPlot(CITE_GE_HTO_011, ndims = 50)

CITE_GE_HTO_011 <- RunUMAP(CITE_GE_HTO_011, dims = 1:25)
16:50:55 UMAP embedding parameters a = 0.9922 b = 1.112
16:50:55 Read 3515 rows and found 25 numeric columns
16:50:55 Using Annoy for neighbor search, n_neighbors = 30
16:50:55 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:50:56 Writing NN index file to temp file /tmp/Rtmp9rQSNO/file1acbf22a4ebd7
16:50:56 Searching Annoy index using 1 thread, search_k = 3000
16:50:57 Annoy recall = 100%
16:50:57 Commencing smooth kNN distance calibration using 1 thread
16:50:58 Initializing from normalized Laplacian + noise
16:50:58 Commencing optimization for 500 epochs, with 141290 positive edges
16:51:11 Optimization finished
CITE_GE_HTO_011 <- FindNeighbors(CITE_GE_HTO_011, dims = 1:25)
Computing nearest neighbor graph
Computing SNN
CITE_GE_HTO_011 <- FindClusters(CITE_GE_HTO_011, resolution = 0.6, algorithm = 3)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 3515
Number of edges: 117662

Running smart local moving algorithm...
Maximum modularity in 10 random starts: 0.8660
Number of communities: 12
Elapsed time: 1 seconds
CITE_GE_HTO_011@meta.data$RNA_cluster <-CITE_GE_HTO_011@active.ident
DimPlot(CITE_GE_HTO_011, label = TRUE, label.size = 10) + NoLegend()

Evaluating Hash-labels:

Sample 010 /S01_CITE

CITE_GE_HTO_010 <- NormalizeData(CITE_GE_HTO_010, assay = "HTO", normalization.method = "CLR")
Normalizing across features
CITE_GE_HTO_010 <- ScaleData(CITE_GE_HTO_010, assay = "HTO")
Centering and scaling data matrix

The demultiplexing of the Spike-in (HL60) is feasible:

FeaturePlot(CITE_GE_HTO_010, features = c("Pat1-R1-H1","Pat1-Ven-R1-H2", "HL60-R1-H3"))
Warning: Could not find Pat1-R1-H1 in the default search locations, found in HTO
assay instead
Warning: Could not find Pat1-Ven-R1-H2 in the default search locations, found in
HTO assay instead
Warning: Could not find HL60-R1-H3 in the default search locations, found in HTO
assay instead

VlnPlot(CITE_GE_HTO_010, assay= "HTO",features = c("Pat1-R1-H1","Pat1-Ven-R1-H2", "HL60-R1-H3"), ncol = 2, pt.size = 0.1) + NoLegend()

VlnPlot(CITE_GE_HTO_010, features = c("Pat1-R1-H1","Pat1-Ven-R1-H2", "HL60-R1-H3"), ncol = 2, pt.size = 0.1,  group.by = "orig.ident") + NoLegend()
Warning: Could not find Pat1-R1-H1 in the default search locations, found in HTO
assay instead
Warning: Could not find Pat1-Ven-R1-H2 in the default search locations, found in
HTO assay instead
Warning: Could not find HL60-R1-H3 in the default search locations, found in HTO
assay instead

FeatureScatter(CITE_GE_HTO_010, feature1 = "hto_Pat1-R1-H1", feature2 =  "hto_Pat1-Ven-R1-H2",group.by = "orig.ident")

FeatureScatter(CITE_GE_HTO_010, feature1 = "hto_Pat1-R1-H1", feature2 =  "hto_HL60-R1-H3",group.by = "orig.ident") + geom_hline(yintercept=1.5) + NoLegend()

RidgePlot(CITE_GE_HTO_010, features = "hto_HL60-R1-H3", y.max = 0.01, group.by = "orig.ident")+ theme(axis.line.y = element_line())
Picking joint bandwidth of 0.0434

VlnPlot(CITE_GE_HTO_010, features = c("hto_HL60-R1-H3"), ncol = 1, pt.size = 0.1,  group.by = "orig.ident", y.max =6) + NoLegend() + geom_hline(yintercept=1.5)
Warning: Removed 13 rows containing non-finite values (stat_ydensity).
Warning: Removed 13 rows containing missing values (geom_point).

RidgePlot(CITE_GE_HTO_010, features = "Pat1-R1-H1", y.max = 0.01, group.by = "orig.ident")+ theme(axis.line.y = element_line())
Warning: Could not find Pat1-R1-H1 in the default search locations, found in HTO
assay instead
Picking joint bandwidth of 0.0623

VlnPlot(CITE_GE_HTO_010, features = c("Pat1-R1-H1"), ncol = 1, pt.size = 0.1,  group.by = "orig.ident", y.max =2) + NoLegend() + geom_hline(yintercept=0.75)
Warning: Could not find Pat1-R1-H1 in the default search locations, found in HTO
assay instead
Warning: Removed 372 rows containing non-finite values (stat_ydensity).
Warning: Removed 372 rows containing missing values (geom_point).

RidgePlot(CITE_GE_HTO_010, features = "hto_Pat1-Ven-R1-H2", y.max = 0.01, group.by = "orig.ident")+ theme(axis.line.y = element_line())
Picking joint bandwidth of 0.0773

VlnPlot(CITE_GE_HTO_010, features = c("hto_Pat1-Ven-R1-H2"), ncol = 1, pt.size = 0.1,  group.by = "orig.ident", y.max =2) + NoLegend() + geom_hline(yintercept=0.55)
Warning: Removed 389 rows containing non-finite values (stat_ydensity).
Warning: Removed 389 rows containing missing values (geom_point).

This is a preliminary demultiplexing trial, final demultiplexing was conducted after a restrictive removal of the cell line

DefaultAssay(object = CITE_GE_HTO_010) <- "HTO"

CITE_GE_HTO_010@meta.data$Sample <- 'Unknown'
CITE_GE_HTO_010@meta.data[WhichCells(CITE_GE_HTO_010, expression = `HL60-R1-H3` > 1.5) ,"Sample"] <- "HL60_R1_H3-Duplet"
CITE_GE_HTO_010@meta.data[WhichCells(CITE_GE_HTO_010, expression = `HL60-R1-H3` > 1.5 & `Pat1-Ven-R1-H2` <0.55 & `Pat1-R1-H1` <0.75),"Sample"] <- "HL60_R1_H3"
CITE_GE_HTO_010@meta.data[WhichCells(CITE_GE_HTO_010, expression = `HL60-R1-H3` < 1.5 & `Pat1-Ven-R1-H2` >0.55 & `Pat1-R1-H1` <0.75),"Sample"] <- "Pat1_Ven_R1_H2"
CITE_GE_HTO_010@meta.data[WhichCells(CITE_GE_HTO_010, expression = `HL60-R1-H3` < 1.5 & `Pat1-Ven-R1-H2` <0.55 & `Pat1-R1-H1` >0.75),"Sample"] <- "Pat1_R1_H1"
DefaultAssay(object = CITE_GE_HTO_010) <- "RNA"
FeatureScatter(CITE_GE_HTO_010, feature1 = "hto_Pat1-R1-H1", feature2 =  "hto_Pat1-Ven-R1-H2",group.by = "Sample")

 table(CITE_GE_HTO_010@meta.data$Sample)

       HL60_R1_H3 HL60_R1_H3-Duplet        Pat1_R1_H1    Pat1_Ven_R1_H2 
              887               244              1435              3077 
          Unknown 
              862 

A proper demultiplexing was conducted in the RMarkdown “Processing and annotation of S01_CITE” after removal of the cell line

Sample 011

CITE_GE_HTO_011 <- NormalizeData(CITE_GE_HTO_011, assay = "HTO", normalization.method = "CLR")
Normalizing across features
CITE_GE_HTO_011 <- ScaleData(CITE_GE_HTO_011, assay = "HTO")
Centering and scaling data matrix
FeaturePlot(CITE_GE_HTO_011, features = c("HL60-R2-H3"))
Warning: Could not find HL60-R2-H3 in the default search locations, found in HTO
assay instead

VlnPlot(CITE_GE_HTO_011, assay= "HTO",features = c("HL60-R2-H3"), ncol = 1, pt.size = 0.1) + NoLegend()

VlnPlot(CITE_GE_HTO_011, assay= "HTO",features = c("HL60-R2-H3"), ncol = 1, pt.size = 0.1,  group.by = "orig.ident") + NoLegend()

RidgePlot(CITE_GE_HTO_011, features = "HL60-R2-H3", y.max = 0.01, group.by = "orig.ident")+ theme(axis.line.y = element_line())
Warning: Could not find HL60-R2-H3 in the default search locations, found in HTO
assay instead
Picking joint bandwidth of 0.0379

VlnPlot(CITE_GE_HTO_011, features = c("HL60-R2-H3"), ncol = 1, pt.size = 0.1,  group.by = "orig.ident", y.max =6) + NoLegend() + geom_hline(yintercept=2)
Warning: Could not find HL60-R2-H3 in the default search locations, found in HTO
assay instead
Warning: Removed 32 rows containing non-finite values (stat_ydensity).
Warning: Removed 32 rows containing missing values (geom_point).

DefaultAssay(object = CITE_GE_HTO_011) <- "HTO"

CITE_GE_HTO_011@meta.data$Sample <- 'Unknown'
CITE_GE_HTO_011@meta.data[WhichCells(CITE_GE_HTO_011, expression = `HL60-R2-H3` > 2) ,"Sample"] <- "HL60_R2_H3"
DefaultAssay(object = CITE_GE_HTO_011) <- "RNA"

Evaluation epitope labels

CITE_GE_HTO_010 <- NormalizeData(CITE_GE_HTO_010, assay = "Epi", normalization.method = "CLR")
Normalizing across features
CITE_GE_HTO_010 <- ScaleData(CITE_GE_HTO_010, assay = "Epi")
Centering and scaling data matrix
VlnPlot(CITE_GE_HTO_010, assay= "Epi", features = c("CD19","CD3","CD16","CD4","CD11c","CD56-NCAM","CD14","CD8","CD45","CD34","CD15"), ncol = 3, pt.size = 0.1,group.by ="orig.ident" ) + NoLegend()

FeaturePlot(CITE_GE_HTO_010, features = c("epi_CD19","CD3","CD16","epi_CD4","CD11c","CD56-NCAM","epi_CD14","CD8","CD45","CD34","CD15" ))
Warning: Could not find CD3 in the default search locations, found in Epi assay
instead
Warning: Could not find CD16 in the default search locations, found in Epi assay
instead
Warning: Could not find CD11c in the default search locations, found in Epi
assay instead
Warning: Could not find CD56-NCAM in the default search locations, found in Epi
assay instead
Warning: Could not find CD8 in the default search locations, found in Epi assay
instead
Warning: Could not find CD45 in the default search locations, found in Epi assay
instead
Warning: Could not find CD34 in the default search locations, found in Epi assay
instead
Warning: Could not find CD15 in the default search locations, found in Epi assay
instead

VlnPlot(CITE_GE_HTO_010, assay= "Epi", features = c("CD19","CD3","CD16","CD4","CD11c","CD56-NCAM","CD14","CD8","CD45","CD34","CD15" ), ncol = 3, pt.size = 0.1) + NoLegend()

The problem is that the overall counts for most of the epitopes are too low for a proper normalization and analysis

FeatureScatter(CITE_GE_HTO_010, feature1 = "rna_FUT4" , feature2 =  "epi_CD15", slot = "counts" )

FeatureScatter(CITE_GE_HTO_010, feature1 = "epi_CD4" , feature2 =  "epi_CD8" )

FeatureScatter(CITE_GE_HTO_010, feature1 = "rna_CD4" , feature2 =  "rna_CD8B" )

RidgePlot(CITE_GE_HTO_010, features = c("epi_CD3", "epi_CD11c", "epi_CD8", "epi_CD16"), ncol = 2)
Picking joint bandwidth of 0.127
Picking joint bandwidth of 0.0948
Picking joint bandwidth of 0.168
Picking joint bandwidth of 0.14

RidgePlot(CITE_GE_HTO_010,assay= "Epi", features = c("CD19","CD3","CD16","CD4","CD11c","CD56-NCAM","CD14","CD8","CD45","CD34","CD15" ), idents = c("0", "1", "2", "3", "5", "6"), ncol = 3)
Picking joint bandwidth of 0.0974
Picking joint bandwidth of 0.0865
Picking joint bandwidth of 0.0835
Picking joint bandwidth of 0.0766
Picking joint bandwidth of 0.0841
Picking joint bandwidth of 0.0846
Picking joint bandwidth of 0.0877
Picking joint bandwidth of 0.095
Picking joint bandwidth of 0.0829
Picking joint bandwidth of 0.0577
Picking joint bandwidth of 0.0525

Clustering on Protein Level

DefaultAssay(CITE_GE_HTO_010) <- "Epi"
CITE_GE_HTO_010 <- RunPCA(CITE_GE_HTO_010, features = rownames(CITE_GE_HTO_010), reduction.name = "pca_epi", reduction.key = "pca_Epi_", 
    verbose = FALSE)
Warning in irlba(A = t(x = object), nv = npcs, ...): You're computing too large
a percentage of total singular values, use a standard svd instead.
Warning in irlba(A = t(x = object), nv = npcs, ...): did not converge--results
might be invalid!; try increasing work or maxit
Warning: Keys should be one or more alphanumeric characters followed by an
underscore, setting key from pca_Epi_ to pcaEpi_
Warning: All keys should be one or more alphanumeric characters followed by an
underscore '_', setting key to pcaEpi_
DimPlot(CITE_GE_HTO_010, reduction = "pca_epi")

epi.data <- GetAssayData(CITE_GE_HTO_010, slot = "data")
epi.dist <- dist(t(epi.data))
CITE_GE_HTO_010[["rnaClusterID"]] <- Idents(CITE_GE_HTO_010)
CITE_GE_HTO_010[["tsne_epi"]] <- RunTSNE(epi.dist, assay = "Epi", reduction.key = "epiTSNE_")
CITE_GE_HTO_010[["umap_epi"]] <- RunUMAP(epi.dist, assay = "Epi", reduction.key = "epiUMAP_")
16:52:50 UMAP embedding parameters a = 0.9922 b = 1.112
16:52:50 Read 6505 rows
16:52:50 Using Annoy for neighbor search, n_neighbors = 30
16:52:58 Commencing smooth kNN distance calibration using 1 thread
16:52:59 Initializing from normalized Laplacian + noise
16:52:59 Commencing optimization for 500 epochs, with 273140 positive edges
16:53:22 Optimization finished
CITE_GE_HTO_010[["epi_snn"]] <- FindNeighbors(epi.dist)$snn
Building SNN based on a provided distance matrix
Computing SNN
Warning: Adding a Graph without an assay associated with it
CITE_GE_HTO_010 <- FindClusters(CITE_GE_HTO_010, resolution = 0.2, graph.name = "epi_snn")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6505
Number of edges: 197891

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9086
Number of communities: 8
Elapsed time: 0 seconds
Warning: Adding a command log without an assay associated with it
CITE_GE_HTO_010@meta.data$Epi_cluster <-CITE_GE_HTO_010@active.ident
clustering.table <- table(Idents(CITE_GE_HTO_010), CITE_GE_HTO_010$rnaClusterID)
clustering.table
   
      0   1   2   3   4   5   6   7   8   9  10  11
  0 508 542 365 344 313  54 164 114   4  80   4   2
  1 554 460 318 269 239  34 130 115   6  55   7   5
  2 182 156 106  66  49  90  33  32  53   9  12  12
  3  50  30  14   7   7 381   6  16   2   3   1  12
  4   7   6   4   3   3   2   3   1 142   0   1   0
  5  21  29  22  10  12  16   7   4   8   1   0   0
  6   4   2   0   2   1   1   0   0  41   0  55   0
  7  21   7  11   3   3  25   5   7   0   0   0   0
DimPlot(CITE_GE_HTO_010, reduction = "umap_epi", pt.size = 0.5, label = TRUE, label.size = 10) + NoLegend()

This clustering outcome is not really useful as we barely have distinct clusters and the largest two clusters are dominant due to the absence of epitope markers:

RidgePlot(CITE_GE_HTO_010,assay= "Epi", features = c("CD19","CD3","CD16","CD4","CD11c","CD56-NCAM","CD14","CD8","CD45","CD34","CD15" ,"HL60-R1-H3" ), ncol = 3)
Warning: Could not find HL60-R1-H3 in the default search locations, found in HTO
assay instead
Picking joint bandwidth of 0.104
Picking joint bandwidth of 0.135
Picking joint bandwidth of 0.137
Picking joint bandwidth of 0.102
Picking joint bandwidth of 0.116
Picking joint bandwidth of 0.12
Picking joint bandwidth of 0.129
Picking joint bandwidth of 0.158
Picking joint bandwidth of 0.118
Picking joint bandwidth of 0.134
Picking joint bandwidth of 0.146
Picking joint bandwidth of 0.0677

The overall epitope counts also do not similar the epitope counts from the cytof experiment:

Y89DiCD45 27.98% Sm147DiCD11c 36.80% Gd158DiCD34 9.75% Yb173DiCD56 9.41% Ho165DiCD3 6.44% Lu175DiCD15 3.60% Nd142DiCD19 3.11% Nd144DiCD16 1.25% Gd160DiCD14 1.64%

The median counts of each epitope show the same picture:

library(matrixStats)

Attaching package: 'matrixStats'
The following object is masked from 'package:dplyr':

    count
rownames(CITE_GE_HTO_010@assays$Epi@counts)
 [1] "CD19"      "CD3"       "CD16"      "CD4"       "CD11c"     "CD56-NCAM"
 [7] "CD14"      "CD8"       "CD45"      "CD34"      "CD15"     
rowMedians(as.matrix(CITE_GE_HTO_010@assays$Epi@counts))
 [1]   2   6   5   4  22   4   2   1   3  18 135

As HL60 was not actively labelled in the experimental procedure these Epitope counts are not expected and show the large antibody diffusion that happened between mixing and library preparation

HL60_CITE <- subset(CITE_GE_HTO_010, subset = Sample == "HL60_R1_H3")
x <- as.array(HL60_CITE@assays$Epi@counts)
barplot(rowSums(x))

saveRDS(CITE_GE_HTO_010, file = "./Pat1_HL60_CITE.rds")
saveRDS(CITE_GE_HTO_011, file = "./Pat1_HL60_control.rds")